################################################
#	Spry and Nunnally (2025)
# Replication Data for "Modern American Dilemma"
# Confirmatory Factor Analysis
################################################

# Front Matter ------------------------------------------------------------

rm(list=ls())		# clear objects in memory

setwd("/Users/amberspry/Dropbox/Research/Data/2016 CMPS/Analysis")
# Data imported from 2016 CMPS

source("2026_sprynunnally_source.R")
library(MVN)
library(lavaan)
library(survey)
library(knitr)
library(dplyr)
library(tidyr)

# Generate Descriptive Stats Tables ---------------------------------------

# WEIGHTED descriptive stats table for ALL respondents using Hmisc

sub.bw %>% 
  select(fa.voting, fa.protest, fa.contact, fa.rioting, fa.discwhite, fa.discblack, fa.belong, fa.outsider, fa.exclude, weight) %>% 
  gather("Variable", "value", -weight) %>% 
  group_by(Variable) %>% 
  summarise(Mean=wtd.mean(value, weights = weight, na.rm=TRUE), 
            SD=sqrt(wtd.var(value, weights = weight, na.rm=TRUE)), 
            min=min(value, na.rm=TRUE), 
            max=max(value, na.rm=TRUE), 
            '% Missing'=100*length(which(is.na(value)))/n()) %>% 
  kable(digits=2, format="pandoc", caption="Descriptive Statistics for Observed Variables")

# WEIGHTED descriptive stats table - Black respondents

sub.black %>% 
  select(fa.voting, fa.protest, fa.contact, fa.rioting, fa.discwhite, fa.discblack, fa.belong, fa.outsider, fa.exclude, weight) %>% 
  gather("Variable", "value", -weight) %>% 
  group_by(Variable) %>% 
  summarise(Mean=wtd.mean(value, weights = weight, na.rm=TRUE), 
            SD=sqrt(wtd.var(value, weights = weight, na.rm=TRUE)), 
            min=min(value, na.rm=TRUE), 
            max=max(value, na.rm=TRUE), 
            '% Missing'=100*length(which(is.na(value)))/n()) %>% 
  kable(digits=2, format="pandoc", caption="Descriptive Statistics for Observed Variables")

# WEIGHTED descriptive stats table - WHITE respondents

sub.white %>% 
  select(fa.voting, fa.protest, fa.contact, fa.rioting, fa.discwhite, fa.discblack, fa.belong, fa.outsider, fa.exclude, weight) %>% 
  gather("Variable", "value", -weight) %>% 
  group_by(Variable) %>% 
  summarise(Mean=wtd.mean(value, weights = weight, na.rm=TRUE), 
            SD=sqrt(wtd.var(value, weights = weight, na.rm=TRUE)), 
            min=min(value, na.rm=TRUE), 
            max=max(value, na.rm=TRUE), 
            '% Missing'=100*length(which(is.na(value)))/n()) %>% 
  kable(digits=2, format="pandoc", caption="Descriptive Statistics for Observed Variables")


# Define CFA Model --------------------------------------------------------

# Exclude dichotomous variables from the model

amer.model <- ' values =~ fa.voting + fa.protest + fa.contact + fa.rioting
                practice =~ fa.discwhite + fa.discblack
                discrim =~ fa.belong + fa.outsider + fa.exclude'


# WEIGHTED MODEL for ALL respondents

fit.weighted <- lavaan::cfa(amer.model,
                  data = sub.bw,
                  ordered = c("values", 
                              "practice", 
                              "discrim"),
                  sampling.weights = "weight")

summary(fit.weighted, standardized = TRUE, fit.measures = TRUE, rsq = TRUE)     # View results, weights are already incorporated

parameterEstimates(fit.weighted, standardized = TRUE) %>%
  filter(op == "=~") %>%
  mutate(stars = ifelse(pvalue < .001, "***",
                        ifelse(pvalue < .01, "**",
                               ifelse(pvalue < .05, "*", "")))) %>%
  select('Latent Factor' = lhs,
         Indicator = rhs,
         B = est,
         SE = se,
         Z = z,
         'p-value' = pvalue,
         Beta = std.all,
         sig = stars) %>%
  kable(digits = 3, format = "pandoc", caption = "Weighted Factor Loadings (Black and White Respondents)")


# WEIGHTED CFA for Black Respondents

fit.b.weighted <- lavaan::cfa(amer.model,
                    data = sub.black,
                    ordered = c("values", 
                                "practice", 
                                "discrim"),
                    sampling.weights = "weight")

summary(fit.b.weighted, standardized = TRUE, fit.measures = TRUE, rsq = TRUE)     # View results, weights are already incorporated

parameterEstimates(fit.b.weighted, standardized = TRUE) %>%
  filter(op == "=~") %>%
  mutate(stars = ifelse(pvalue < .001, "***",
                        ifelse(pvalue < .01, "**",
                               ifelse(pvalue < .05, "*", "")))) %>%
  select('Latent Factor' = lhs,
         Indicator = rhs,
         B = est,
         SE = se,
         Z = z,
         'p-value' = pvalue,
         Beta = std.all,
         sig = stars) %>%
  kable(digits = 3, format = "pandoc", caption = "Weighted Factor Loadings (Black Respondents)")


# WEIGHTED CFA for White Respondents

fit.w.weighted <- lavaan::cfa(amer.model,
                    data = sub.white,
                    ordered = c("values", 
                                "practice", 
                                "discrim"),
                    sampling.weights = "weight")

summary(fit.w.weighted, standardized = TRUE, fit.measures = TRUE, rsq = TRUE)     # View results, weights are already incorporated

parameterEstimates(fit.w.weighted, standardized = TRUE) %>%
  filter(op == "=~") %>%
  mutate(stars = ifelse(pvalue < .001, "***",
                        ifelse(pvalue < .01, "**",
                               ifelse(pvalue < .05, "*", "")))) %>%
  select('Latent Factor' = lhs,
         Indicator = rhs,
         B = est,
         SE = se,
         Z = z,
         'p-value' = pvalue,
         Beta = std.all,
         sig = stars) %>%
  kable(digits = 3, format = "pandoc", caption = "Weighted Factor Loadings (White Respondents)")


# Weighted Latent Factor Correlation Tables -------------------------------

# For all respondents

parameterEstimates(fit.weighted, standardized=TRUE) %>% 
  filter(op == "~~", 
         lhs %in% c("values", "practice", "discrim"), 
         !is.na(pvalue)) %>% 
  mutate(stars = ifelse(pvalue < .001, "***", 
                        ifelse(pvalue < .01, "**", 
                               ifelse(pvalue < .05, "*", "")))) %>% 
  select('Factor 1'=lhs, 
         'Factor 2'=rhs, 
         Correlation=est, 
         sig=stars) %>% 
  kable(digits = 2, format="pandoc", caption="Latent Factor Correlations (Black + White)")


# For Black respondents

parameterEstimates(fit.b.weighted, standardized=TRUE) %>% 
  filter(op == "~~", 
         lhs %in% c("values", "practice", "discrim"), 
         !is.na(pvalue)) %>% 
  mutate(stars = ifelse(pvalue < .001, "***", 
                        ifelse(pvalue < .01, "**", 
                               ifelse(pvalue < .05, "*", "")))) %>% 
  select('Factor 1'=lhs, 
         'Factor 2'=rhs, 
         Correlation=est, 
         sig=stars) %>% 
  kable(digits = 2, format="pandoc", caption="Latent Factor Correlations (Black)")


# For white respondents

parameterEstimates(fit.w.weighted, standardized=TRUE) %>% 
  filter(op == "~~", 
         lhs %in% c("values", "practice", "discrim"), 
         !is.na(pvalue)) %>% 
  mutate(stars = ifelse(pvalue < .001, "***", 
                        ifelse(pvalue < .01, "**", 
                               ifelse(pvalue < .05, "*", "")))) %>% 
  select('Factor 1'=lhs, 
         'Factor 2'=rhs, 
         Correlation=est, 
         sig=stars) %>% 
  kable(digits = 2, format="pandoc", caption="Latent Factor Correlations (White)")


# Plot Distributions ------------------------------------------------------

fa.hist <- svydesign(ids = ~1, weights = ~weight, data = sub.bw)
x.vars1 <- c("fa.voting", "fa.protest", "fa.contact", "fa.rioting", "fa.discwhite", "fa.discblack", "fa.belong", "fa.outsider", "fa.exclude")
x.vars <- sub.bw[x.vars1]

sub.bw %>%
  select(all_of(x.vars1), weight) %>%
  pivot_longer(cols = -weight, names_to = "Variable", values_to = "Value") %>%
  ggplot(aes(x = Value, weight = weight)) +
  geom_histogram(bins = 20, fill = "gray60", color = "black") +
  facet_wrap(~Variable, scales = "free") +
  theme_minimal() +
  labs(title = "Weighted Distribution of Factor Analysis Variables",
       x = "Value", y = "Weighted Frequency")
